import delimited "${datadir_landreg\pp-complete-Mar18.csv", colrange(2:10) clear 
	
ren v1 price
ren v3 postcode 

gen new = v5 == "Y"

gen byte lease = v6 == "L"


*--- Dates
g dated = clock(v2, "YMDhm")
replace dated = dofc(dated)
format dated %td

g datem = mofd(dated)
format datem %tm
g dateq = qofd(dated)
format dateq %tq
g year = year(dated)


*--- Units for repeat sales indices
ren v7 paon
ren v8 saon 
ren v9 street

egen long unit_id = group(postcode paon saon street), missing // 222s

preserve
	keep if year > 2012
	keep if new
	save "0 - Shared appreciation\data\landreg_post2012_new", replace
restore

drop v2 v4 v5 v6 saon paon street
save "0 - Shared appreciation\data\landreg_selected_vars", replace
// 47 sec	



*--- Exclude Wales
ren postcode postcode6
merge m:1 postcode6 using ///
	"0 - Shared appreciation\data\la_region_postcode_lookup", ///
	keep(1 3) 
	

drop _merge
tab regionname, m
drop if mi(regionname)
drop if year == 1994

// for regressions later
gen lprice = log(price)

save "datadir\landreg_selected_no_Wales", replace

	
*--- Only properties that started as new builds
bysort unit_id: egen prevnew = max(new)
keep if prevnew

bysort unit_id (dated): keep if _n <= 2

save "datadir\landreg_selected_no_Wales_prevnew", replace
	

*--- All properties with sale after 2012 and at least two sales
use "datadir\landreg_selected_no_Wales", replace 

duplicates tag unit_id, gen(flag)
drop if flag == 0

save ///
	"0 - Shared appreciation\data\landreg_no_Wales_twosales", ///
	replace

	
bysort unit_id: egen post2012 = max(year > 2012)

save "datadir\landreg_no_Wales_onepost2012", replace